We start with the microSubt phyloseq object created in the Analysis-OleateMicrobiota file. We create a csv file containing the major butyrate-producing microbiota reported in literature called butyrate.csv. This file was created from the silva v138.1 taxonomy file. Then, we filter the microSubt with genera from butyrate.csv.
# load libraries
library("phyloseq")
library("ggplot2")
library("plyr")
library("vegan")
library("grid")
library("directlabels")
library("knitr")
library("clustsig")
library("ellipse")
library("ape")
library("DESeq2")
library("microbial")
library("VennDiagram")
library("microbiome")
library("microbiomeutilities")
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
library("viridis")
library("RColorBrewer")
library("ggpubr")
library("patchwork")
## import data
sharedFile = read.table("oleate.opti_mcc.shared")
sharedFile = t(sharedFile) ## transform data
rownames(sharedFile) = sharedFile[,1] ## define rowname
colnames(sharedFile) = sharedFile[2,] ## define column names
sharedFile <-as.matrix(sharedFile)
sharedFile = sharedFile[,2:57] ## only include columns that reflects OTU numbers (remember order [row,colum]) 2 -> 57 for me
## as I had 57 samples
sharedFile = sharedFile[4:819,] ## and which rows you want. I have 819 OTUs so I changed this number to 819
# original => sharedFile = sharedFile[4:819,]
class(sharedFile) <- "numeric"
head(sharedFile) ## look at head first 7 lines to see it's made correclty
## ce4406 ce4407 ce4408 ce4410 ce4411 ce4412 ce4413 ce4414 ce4415 ce4416
## Otu001 648 490 686 411 726 838 761 175 144 1520
## Otu002 1 143 1693 1037 794 548 356 418 149 1075
## Otu003 867 743 1670 733 1171 559 953 165 202 564
## Otu004 310 8 1379 808 903 419 1487 953 46 192
## Otu005 483 704 1357 2703 825 735 381 416 188 2496
## Otu006 0 0 0 1 170 2 4 49 40 365
## ce4417 ce4418 ce4419 ce4420 ce4421 ce4422 ce4423 ce4424 co4406 co4407
## Otu001 1118 1091 1691 703 936 1011 54 1452 1170 257
## Otu002 2412 6002 1290 586 1216 1117 2346 48 5 843
## Otu003 1201 1204 871 980 890 922 1522 388 406 1539
## Otu004 29 291 1469 21 6 4 3 20 1066 14
## Otu005 2134 969 1805 1737 1385 1602 1220 391 292 188
## Otu006 123 1317 97 245 237 272 146 2061 3 2
## co4408 co4410 co4411 co4412 co4413 co4414 co4415 co4416 co4417 co4418
## Otu001 1823 856 397 705 356 392 45 201 4331 356
## Otu002 767 177 220 404 24 442 261 1187 1901 1269
## Otu003 1116 517 909 996 394 499 490 498 664 1311
## Otu004 1089 564 518 468 660 1151 27 133 481 13
## Otu005 264 400 795 778 310 875 168 31 804 483
## Otu006 0 0 59 1 1 110 19 195 1149 66
## co4419 co4420 co4421 co4422 co4423 co4424 co4428 f4406 f4407 f4408 f4410
## Otu001 182 188 1372 944 1621 233 1166 7021 9127 2210 3181
## Otu002 2380 1814 815 390 778 2231 10 31 1790 14 173
## Otu003 567 499 527 791 1318 1249 225 1108 1447 679 726
## Otu004 31 484 35 0 18 4 6 1221 2604 386 2143
## Otu005 281 212 501 519 584 237 148 1300 380 1279 628
## Otu006 240 39 280 237 667 376 1572 3 0 0 1
## f4411 f4412 f4413 f4414 f4415 f4416 f4417 f4418 f4419 f4420 f4421 f4422
## Otu001 6323 8943 2011 4584 4530 7401 3309 6001 7938 4862 1619 1334
## Otu002 1367 30 99 2063 1193 898 472 265 3751 3667 1244 1022
## Otu003 1242 1133 1532 1163 687 902 536 443 1878 554 1781 1668
## Otu004 2441 716 1082 4784 4027 3709 3814 2177 1569 2593 80 21
## Otu005 1154 1847 1024 816 591 1263 1511 521 1505 1013 272 558
## Otu006 786 1 0 1648 862 1234 1175 931 2214 1074 2252 1979
## f4423 f4424 f4428
## Otu001 3749 11842 791
## Otu002 1421 1194 996
## Otu003 610 2459 971
## Otu004 147 23 5
## Otu005 1367 64 324
## Otu006 798 506 1765
## Import subsampled otu matrix (26880 seqs)
sharedsubFile = read.table('oleate.opti_mcc.0.03.subsample.shared')
sharedsubFile = t(sharedsubFile)
rownames(sharedsubFile) = sharedsubFile[,1]
colnames(sharedsubFile) = sharedsubFile[2,]
sharedsubFile = sharedsubFile[,2:57]
sharedsubFile = sharedsubFile[4:494,]
class(sharedsubFile) <- "numeric"
head(sharedsubFile)
## ce4406 ce4407 ce4408 ce4410 ce4411 ce4412 ce4413 ce4414 ce4415 ce4416
## Otu001 335 271 174 72 204 358 233 131 144 312
## Otu002 1 68 361 199 235 236 103 311 149 237
## Otu003 468 380 379 139 359 254 289 125 202 108
## Otu004 145 2 312 128 261 186 453 680 46 37
## Otu005 245 348 299 492 254 319 124 308 188 509
## Otu006 0 0 0 0 38 1 1 31 40 84
## ce4417 ce4418 ce4419 ce4420 ce4421 ce4422 ce4423 ce4424 co4406 co4407
## Otu001 187 161 311 109 212 197 10 137 456 145
## Otu002 414 825 219 86 255 194 590 3 2 528
## Otu003 213 166 156 157 197 176 380 30 160 916
## Otu004 6 38 284 3 0 1 0 1 407 9
## Otu005 412 134 311 262 300 295 299 34 128 109
## Otu006 20 173 20 46 47 44 30 184 2 2
## co4408 co4410 co4411 co4412 co4413 co4414 co4415 co4416 co4417 co4418
## Otu001 473 406 172 289 160 168 41 127 611 138
## Otu002 224 82 101 156 11 181 240 770 281 465
## Otu003 304 272 445 419 180 214 459 308 85 543
## Otu004 317 258 253 167 321 469 27 86 69 7
## Otu005 83 202 400 294 147 373 155 19 105 172
## Otu006 0 0 24 0 0 44 18 121 169 18
## co4419 co4420 co4421 co4422 co4423 co4424 co4428 f4406 f4407 f4408 f4410
## Otu001 84 80 358 335 340 72 237 933 1161 548 615
## Otu002 873 792 208 118 166 744 1 4 230 2 29
## Otu003 212 202 130 290 274 413 48 141 199 168 115
## Otu004 18 223 9 0 4 1 2 151 360 105 410
## Otu005 111 73 132 184 122 82 24 154 52 311 113
## Otu006 95 14 68 96 126 127 293 0 0 0 0
## f4411 f4412 f4413 f4414 f4415 f4416 f4417 f4418 f4419 f4420 f4421 f4422
## Otu001 929 1220 419 585 727 779 584 1196 788 533 250 228
## Otu002 196 5 22 259 167 105 74 49 390 416 224 144
## Otu003 171 167 296 144 124 95 99 87 195 75 271 261
## Otu004 384 99 204 639 655 415 673 421 145 281 15 5
## Otu005 179 283 209 99 101 125 256 93 145 131 40 87
## Otu006 106 0 0 177 135 128 212 197 229 116 332 338
## f4423 f4424 f4428
## Otu001 401 1571 135
## Otu002 158 159 161
## Otu003 65 348 150
## Otu004 12 3 0
## Otu005 137 11 59
## Otu006 95 66 275
dim(sharedsubFile)
## [1] 491 56
sharedsubFile <-as.matrix(sharedsubFile)
## Import taxonomy file
## Copy and paste into notepad and save. Then carry on:
taxFile = read.table('oleate.taxonomy', header=T, sep='\t')
rownames(taxFile) = taxFile[,1]
taxFile = taxFile[,3:8]
taxFile = as.matrix(taxFile)
head(taxFile)
## Kingdom Phylum Class Order
## Otu001 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales"
## Otu002 "Bacteria" "Verrucomicrobiota" "Verrucomicrobiae" "Verrucomicrobiales"
## Otu003 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales"
## Otu004 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales"
## Otu005 "Bacteria" "Firmicutes" "Clostridia" "Lachnospirales"
## Otu006 "Bacteria" "Firmicutes" "Bacilli" "Erysipelotrichales"
## Family Genus
## Otu001 "Streptococcaceae" "Lactococcus"
## Otu002 "Akkermansiaceae" "Akkermansia"
## Otu003 "Bacteroidaceae" "Bacteroides"
## Otu004 "Lactobacillaceae" "Lactobacillus"
## Otu005 "Lachnospiraceae" "Lachnospiraceae"
## Otu006 "Erysipelotrichaceae" "Dubosiella"
## import metadata file
metaFile = read.table('oleate.metadata', header=T, sep='\t')
rownames(metaFile) = metaFile[,1]
metaFile = metaFile[,1:4]
head(metaFile)
## group site mouse condition
## ce4406 ce4406 cecum 4406 10%Fat
## ce4407 ce4407 cecum 4407 10%Fat
## ce4408 ce4408 cecum 4408 10%Fat
## ce4410 ce4410 cecum 4410 10%Fat
## ce4411 ce4411 cecum 4411 10%Fat
## ce4412 ce4412 cecum 4412 10%Fat
## Create phyloseq object
OTU = otu_table(sharedFile, taxa_are_rows = TRUE)
OTUsub = otu_table(sharedsubFile, taxa_are_rows = TRUE)
TAX = tax_table(taxFile)
META = sample_data(metaFile)
physeq = phyloseq(OTU, TAX, META)
physeqSub = phyloseq(OTUsub, TAX, META)
physeqSub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 491 taxa and 56 samples ]
## sample_data() Sample Data: [ 56 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 491 taxa by 6 taxonomic ranks ]
## Get rid of any OTUs not present in any samples and get relative abundance
microSub <- prune_taxa(taxa_sums(physeqSub) > 0, physeqSub)
microSubRel = transform_sample_counts(microSub, function(x) x / sum(x) )
microSubRelFilt = filter_taxa(microSubRel, function(x) mean(x) > 1e-5, TRUE)
# for subsampled shared file
# create a tree and a new phyloseq object
random_tree = rtree(ntaxa(microSub), rooted=TRUE, tip.label=taxa_names(microSub))
plot(random_tree)
microSubt = merge_phyloseq(microSub, random_tree)
but <- read.csv("butyrate.csv", header = TRUE, sep = ",")
head(but$Genus)
## [1] "Anaerostipes" "Anaerostipes" "Anaerostipes" "Anaerostipes" "Anaerostipes"
## [6] "Anaerostipes"
microBut <- subset_taxa(microSubt, Genus %in% but$Genus)
microBut
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 92 taxa and 56 samples ]
## sample_data() Sample Data: [ 56 samples by 4 sample variables ]
## tax_table() Taxonomy Table: [ 92 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 92 tips and 91 internal nodes ]
# Plotting rarefaction
rarecurve(t(otu_table(microBut)), step=50, cex=0.5)
# Plotting taxonomy
plot_bar(microBut, fill="Phylum") + facet_wrap(~condition, scales = "free_x", nrow = 1)+theme(text = element_text(size=16))+geom_bar(stat="identity")+
scale_fill_brewer(type = "seq", palette = "Spectral")+
theme(
legend.background = element_rect(
fill = "lemonchiffon",
colour = "grey50",
size = 1
)
)
plot_bar(microBut, fill="Phylum") + facet_wrap(~site, scales = "free_x", nrow = 1)+theme(text = element_text(size=16))+geom_bar(stat="identity")+
scale_fill_brewer(type = "seq", palette = "Spectral")+
theme(
legend.background = element_rect(
fill = "lemonchiffon",
colour = "grey50",
size = 1
)
)
plot_bar(microBut, fill="Phylum")+facet_grid(condition~site, scales = "free")+theme(text = element_text(size=16))+geom_bar(stat="identity")+
scale_fill_brewer(type = "seq", palette = "Spectral")+
theme(
legend.background = element_rect(
fill = "lemonchiffon",
colour = "grey50",
size = 1
)
)
# Plotting PCoA
my.PCoA2 <- ordinate(microBut, "PCoA", "bray")
plot_ordination(microBut, my.PCoA2, type = "group", color = "condition", shape = "site")+theme(text = element_text(size=20))+ geom_point(size=5) + scale_color_manual(values=c("brown3", "steelblue","grey50"))+
geom_text(label= "group", size=2)
## Warning in plot_ordination(microBut, my.PCoA2, type = "group", color = "condition", : type argument not supported. `type` set to 'samples'.
## See `plot_ordination('list')`
plot_ordination(microBut, my.PCoA2, type = "group", color = "condition")+ facet_wrap(~site, scales = "free_x", nrow = 1) + geom_point(size=5)+theme(text = element_text(size=20)) + scale_color_manual(values=c("brown3", "steelblue","grey50"))
## Warning in plot_ordination(microBut, my.PCoA2, type = "group", color = "condition"): type argument not supported. `type` set to 'samples'.
## See `plot_ordination('list')`
plot_ordination(microBut, my.PCoA2, type = "group", color = "condition", shape = "site")+theme(text = element_text(size=20))+ geom_point(size=5)+
geom_line() + scale_color_manual(values=c("brown3", "steelblue","grey50"))
## Warning in plot_ordination(microBut, my.PCoA2, type = "group", color = "condition", : type argument not supported. `type` set to 'samples'.
## See `plot_ordination('list')`
plot_ordination(microBut, my.PCoA2, type = "group", color = "condition")+theme(text = element_text(size=20))+ geom_point(size=5)+ stat_ellipse(level=0.9) + scale_color_manual(values=c("brown3", "steelblue","grey50"))
## Warning in plot_ordination(microBut, my.PCoA2, type = "group", color = "condition"): type argument not supported. `type` set to 'samples'.
## See `plot_ordination('list')`
# PCoA plot using the unweighted UniFrac as distance
wunifrac_dist = phyloseq::distance(microBut, method="unifrac", weighted=F)
ordination = ordinate(microBut, method="PCoA", distance=wunifrac_dist)
plot_ordination(microBut, ordination, color="condition") + theme(aspect.ratio=1)+
ggtitle("PCoA: unweigthed Unifrac")+geom_point(size=5) + scale_color_manual(values=c("brown3", "steelblue","grey50"))
#Plot PCoA using the weighted UniFrac as distance
wunifrac_dist = phyloseq::distance(microBut, method="unifrac", weighted=T)
ordUF = ordinate(microBut, method="PCoA", distance=wunifrac_dist)
plot_ordination(microBut, ordUF, color = "condition") +
ggtitle("PCoA: weigthed Unifrac")+geom_point(size=5) + scale_color_manual(values=c("brown3", "steelblue","grey50"))
# Normalization
phy <- normalize(microBut, method = "relative")
## Normalization using relative method
# Plot taxonomy by group
plotbar(phy,level="Phylum", group="condition") + theme(axis.text.x = element_text(size = 16))
plotbar(phy,level="Phylum", group="site") + theme(axis.text.x = element_text(size = 16))
# Plot alpha diversity
plotalpha(microBut, group = "condition", color = c("brown3","grey50", "steelblue"))
plotalpha(microBut, group = "site")
## No significant difference between any of the groups
# Test for significance between groups
beta_condition <-betatest(phy,group="condition")
## Do PERMANOVA for: condition
beta_site <-betatest(phy,group="site")
## Do PERMANOVA for: site
# Biomarkers discovery and plotting
bio <- biomarker(microBut,group="condition")
## Normalization using relative method
plotmarker(bio,level="Genus")
# LefSe testing and plotting
lda3 <- ldamarker(microBut,group="condition")
## Normalization using relative method
lda3
## # A tibble: 42 x 14
## # Groups: rank, tax [42]
## rank tax statistic p.value parameter method p.adj `10%Fat` `60%Fat`
## <chr> <chr> <dbl> <dbl> <int> <chr> <dbl> <dbl> <dbl>
## 1 Class Bacteri… 6.25 4.39e-2 2 Kruska… 6.92e-2 2912. 112564.
## 2 Class Bacteri… 6.72 3.47e-2 2 Kruska… 6.92e-2 457315. 117648.
## 3 Class Bacteri… 3.09 2.13e-1 2 Kruska… 2.50e-1 NA NA
## 4 Family Bacteri… 6.25 4.39e-2 2 Kruska… 6.92e-2 2912. 112564.
## 5 Family Bacteri… 7.14 2.82e-2 2 Kruska… 6.42e-2 418131. 79221.
## 6 Family Bacteri… 8.65 1.32e-2 2 Kruska… 3.87e-2 0 5937.
## 7 Family Bacteri… 2.79 2.47e-1 2 Kruska… 2.60e-1 NA NA
## 8 Family Bacteri… 3.87 1.44e-1 2 Kruska… 1.97e-1 NA NA
## 9 Family Bacteri… 35.6 1.88e-8 2 Kruska… 2.57e-7 14337. 2215068.
## 10 Family Bacteri… 29.9 3.24e-7 2 Kruska… 2.21e-6 3906047. 2229724.
## # … with 32 more rows, and 5 more variables: Oleate <dbl>, max <dbl>,
## # min <dbl>, LDAscore <dbl>, direction <chr>
write.csv(lda3, "lda3.csv", row.names = FALSE)
lda4 <- read.csv(file = 'lda4.csv')
plotLDA(lda4,group=c("10%Fat","60%Fat"),lda=5,pvalue=0.05)+theme(axis.text.y = element_text(size = 16))+theme(text = element_text(size=20))
plotLDA(lda4,group=c("10%Fat","Oleate"),lda=5,pvalue=0.05)+theme(axis.text.y = element_text(size = 16))+theme(text = element_text(size=20))
plotLDA(lda4,group=c("Oleate","60%Fat"),lda=5,pvalue=0.05) +theme(axis.text.y = element_text(size = 16))+theme(text = element_text(size=20))
## Modified difftest function from microbial package to test for significance
# create difftest2 function
difftest2<-function(physeq,group,pvalue=0.05,padj=NULL,log2FC=0,gm_mean=TRUE,fitType="local",quiet=FALSE){
if(!taxa_are_rows(physeq)){
physeq<-t(physeq)
}
otu <- as(otu_table(physeq),"matrix")
tax <- as.data.frame(as.matrix(tax_table(physeq)))
colData<-as(sample_data(physeq),"data.frame")
colData$condition<-colData[,group]
contrasts<-levels(factor(unique(colData$condition)))[1:2]
if(isTRUE(gm_mean)){
countData<-round(otu, digits = 0)
}else{
countData<-round(otu, digits = 0)+1
}
dds <- DESeqDataSetFromMatrix(countData, colData, as.formula(~condition))
if(isTRUE(gm_mean)){
geoMeans = apply(counts(dds), 1, gm_mean)
dds = estimateSizeFactors(dds, geoMeans = geoMeans)
}
dds <- DESeq(dds, fitType=fitType)
res <- results(dds,contrast=c("condition",contrasts),cooksCutoff = FALSE)
res_tax = cbind(as.data.frame(res), as.matrix(countData[rownames(res), ]))
if(!is.null(padj)){
pval<-padj
sig <- rownames(subset(res,padj<pval&abs(log2FoldChange)>log2FC))
}else{
pval<-pvalue
sig <- rownames(subset(res,pvalue<pval&abs(log2FoldChange)>log2FC))
}
res_tax$Significant<- "No"
res_tax$Significant <- ifelse(rownames(res_tax) %in% sig, "Yes", "No")
res_tax <- cbind(res_tax, tax[rownames(res),])
return(as.data.frame(res_tax))
}
# Test for significant bacteria using microbial package
mic_res <- difftest2(microBut,group="condition", gm_mean=FALSE)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 2 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
plotdiff(mic_res,level="Genus",padj=0.001,log2FC = 3,fontsize.y = 14)
## Test for significant OTUs using DESeq2
sample_data(microBut)$condition <- as.factor(sample_data(microBut)$condition)
ds = phyloseq_to_deseq2(microBut, ~ condition)
## converting counts to integer mode
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
ds = DESeq(ds)
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 2 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
alpha = 0.01
# compare Oleate and 60%Fat
res = results(ds, contrast=c("condition", "Oleate", "60%Fat"), alpha=alpha)
res = res[order(res$padj, na.last=NA), ]
res_sig = res[(res$padj < alpha), ]
res_sig
## log2 fold change (MLE): condition Oleate vs 60%Fat
## Wald test p-value: condition Oleate vs 60%Fat
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## Otu028 31.63472 5.66889 0.751470 7.54374 4.56671e-14 4.20138e-12
## Otu031 27.91385 3.69359 0.552382 6.68665 2.28335e-11 1.05034e-09
## Otu075 5.48373 4.18206 0.753635 5.54919 2.87003e-08 8.80143e-07
## Otu052 11.59919 2.40842 0.493055 4.88470 1.03587e-06 2.38251e-05
## Otu029 27.86040 -4.29768 0.908277 -4.73168 2.22666e-06 4.09706e-05
## Otu097 1.10108 -4.44731 1.032066 -4.30913 1.63897e-05 2.51309e-04
res_sig = cbind(as(res_sig, "data.frame"), as(tax_table(microBut)[rownames(res_sig), ], "matrix"))
ggplot(res_sig, aes(x=Genus, y=log2FoldChange, color=Phylum)) +
geom_jitter(size=5, width = 0.2) + theme(text = element_text(size=16)) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
# compare 10%Fat and 60%Fat
res2 = results(ds, contrast=c("condition", "10%Fat", "60%Fat"), alpha=alpha)
res2 = res2[order(res2$padj, na.last=NA), ]
res_sig2 = res2[(res2$padj < alpha), ]
res_sig2
## log2 fold change (MLE): condition 10%Fat vs 60%Fat
## Wald test p-value: condition 10%Fat vs 60%Fat
## DataFrame with 7 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## Otu029 27.86040 -7.83160 0.985919 -7.94345 1.96632e-15 1.80901e-13
## Otu028 31.63472 5.48619 0.751341 7.30186 2.83816e-13 1.30555e-11
## Otu075 5.48373 4.19824 0.753127 5.57441 2.48367e-08 7.61660e-07
## Otu031 27.91385 3.03387 0.553472 5.48153 4.21672e-08 9.69845e-07
## Otu097 1.10108 -4.22887 1.042150 -4.05783 4.95301e-05 9.11354e-04
## Otu052 11.59919 1.97014 0.495261 3.97798 6.95032e-05 1.06572e-03
## Otu077 4.86304 2.49440 0.724857 3.44124 5.79065e-04 7.61057e-03
res_sig2 = cbind(as(res_sig2, "data.frame"), as(tax_table(microBut)[rownames(res_sig2), ], "matrix"))
ggplot(res_sig2, aes(x=Genus, y=log2FoldChange, color=Phylum)) +
geom_jitter(size=5, width = 0.2) + theme(text = element_text(size=16)) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
# compare 10%Fat and Oleate
res3 = results(ds, contrast=c("condition", "10%Fat", "Oleate"), alpha=alpha)
res3 = res3[order(res3$padj, na.last=NA), ]
res_sig3 = res3[(res3$padj < alpha), ]
res_sig3
## log2 fold change (MLE): condition 10%Fat vs Oleate
## Wald test p-value: condition 10%Fat vs Oleate
## DataFrame with 4 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## Otu055 6.82993 -2.78274 0.568278 -4.89679 9.74142e-07 8.96211e-05
## Otu041 13.67379 -2.08072 0.509738 -4.08193 4.46630e-05 2.05450e-03
## Otu029 27.86040 -3.53391 0.915537 -3.85994 1.13417e-04 3.47811e-03
## Otu060 8.21605 2.33845 0.634518 3.68540 2.28344e-04 5.25190e-03
res_sig3 = cbind(as(res_sig3, "data.frame"), as(tax_table(microBut)[rownames(res_sig3), ], "matrix"))
ggplot(res_sig3, aes(x=Genus, y=log2FoldChange, color=Phylum)) +
geom_jitter(size=5, width = 0.2) + theme(text = element_text(size=16)) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5))
## Plot heatmap
plot_taxa_heatmap(microBut,
subset.top = 20,
VariableA = "condition",
heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
transformation = "log10"
)
## Top 20 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.
## Venn Diagram
pseq<-microBut
table(meta(pseq)$condition, useNA = "always")
##
## 10%Fat 60%Fat Oleate <NA>
## 21 14 21 0
# convert to relative abundances
pseq.rel <- microbiome::transform(pseq, "compositional")
disease_states <- unique(as.character(meta(pseq.rel)$condition))
print(disease_states)
## [1] "10%Fat" "Oleate" "60%Fat"
#Write a for loop to go through each of the disease_states one by one and combine identified core taxa into a list.
list_core <- c() # an empty object to store information
for (n in disease_states){ # for each variable n in DiseaseState
#print(paste0("Identifying Core Taxa for ", n))
ps.sub <- subset_samples(pseq.rel, condition == n) # Choose sample from DiseaseState by n
core_m <- core_members(ps.sub, # ps.sub is phyloseq selected with only samples from g
detection = 0.001, # 0.001 in atleast 90% samples
prevalence = 0.75)
print(paste0("No. of core taxa in ", n, " : ", length(core_m))) # print core taxa identified in each DiseaseState.
list_core[[n]] <- core_m # add to a list core taxa for each group.
#print(list_core)
}
## [1] "No. of core taxa in 10%Fat : 19"
## [1] "No. of core taxa in Oleate : 16"
## [1] "No. of core taxa in 60%Fat : 22"
print(list_core)
## $`10%Fat`
## [1] "Otu061" "Otu077" "Otu080" "Otu052" "Otu012" "Otu041" "Otu063" "Otu075"
## [9] "Otu026" "Otu050" "Otu031" "Otu070" "Otu044" "Otu028" "Otu018" "Otu048"
## [17] "Otu037" "Otu084" "Otu060"
##
## $Oleate
## [1] "Otu061" "Otu077" "Otu052" "Otu012" "Otu041" "Otu063" "Otu075" "Otu026"
## [9] "Otu055" "Otu031" "Otu070" "Otu044" "Otu028" "Otu048" "Otu037" "Otu060"
##
## $`60%Fat`
## [1] "Otu078" "Otu079" "Otu029" "Otu052" "Otu012" "Otu083" "Otu041" "Otu063"
## [9] "Otu026" "Otu050" "Otu055" "Otu031" "Otu070" "Otu044" "Otu018" "Otu048"
## [17] "Otu097" "Otu037" "Otu084" "Otu076" "Otu060" "Otu089"
# Specify colors and plot venn
# supplying colors in the order they appear in list_core
mycols <- c("brown3", "grey50", "steelblue")
venn.diagram(list_core,fill = mycols, filename = 'But_venn_diagramm.png',
output=TRUE)
## [1] 1
pa <- aggregate_taxa(microBut, "Genus")
top_four <- top_taxa(pa, 4)
top_four
## [1] "Bacteria_Firmicutes_Clostridia_Oscillospirales_Oscillospiraceae_uncultured"
## [2] "Bacteria_Firmicutes_Clostridia_Lachnospirales_Lachnospiraceae_uncultured"
## [3] "Lachnoclostridium"
## [4] "Oscillibacter"
mycols <- c("brown3", "steelblue", "grey50")
p <- plot_listed_taxa(pa, top_four,
group= "condition",
group.order = c("10%Fat","Oleate","60%Fat"),
group.colors = mycols,
add.violin = TRUE,
violin.opacity = 0.3,
dot.opacity = 0.25,
box.opacity = 0.25,
panel.arrange= "wrap")
## An additonal column Sam_rep with sample names is created for reference purpose
comps <- make_pairs(sample_data(pa)$condition)
p <- p + stat_compare_means(
comparisons = comps,
label = "p.format",
tip.length = 0.05,
method = "wilcox.test")
print(p + ylab("Relative abundance") + scale_y_continuous(labels = scales::percent))
## Get taxa summary by group(s)
p0 <- microSubt
p0.gen <- aggregate_taxa(microBut,"Genus")
x.d <- dominant_taxa(p0,level = "Genus", group="condition")
head(x.d$dominant_overview)
## # A tibble: 6 x 5
## # Groups: condition [3]
## condition dominant_taxa n rel.freq rel.freq.pct
## <chr> <chr> <int> <dbl> <chr>
## 1 10%Fat Lachnospiraceae 14 66.7 67%
## 2 60%Fat Lachnospiraceae 9 64.3 64%
## 3 Oleate Lachnospiraceae 9 42.9 43%
## 4 10%Fat Lactococcus 6 28.6 29%
## 5 Oleate Lactococcus 6 28.6 29%
## 6 Oleate Akkermansia 4 19 19%
tx.sum1 <- taxa_summary(p0, "Phylum")
## Data provided is not compositional
## will first transform
p0.rel <- microbiome::transform(p0, "compositional")
grp_abund <- get_group_abundances(p0.rel,
level = "Phylum",
group="condition",
transform = "compositional")
## An additonal column Sam_rep with sample names is created for reference purpose
mycols <- c("brown3", "steelblue","grey50")
# clean names
grp_abund$OTUID <- gsub("p__", "",grp_abund$OTUID)
grp_abund$OTUID <- ifelse(grp_abund$OTUID == "",
"Unclassified", grp_abund$OTUID)
mean.plot <- grp_abund %>% # input data
ggplot(aes(x= reorder(OTUID, mean_abundance), # reroder based on mean abundance
y= mean_abundance,
fill=condition)) + # x and y axis
geom_bar( stat = "identity",
position=position_dodge()) +
scale_fill_manual("condition", values=mycols) + # manually specify colors
theme_bw() + # add a widely used ggplot2 theme
ylab("Mean Relative Abundance") + # label y axis
xlab("Phylum") + # label x axis
coord_flip() # rotate plot
mean.plot
## Plotting density of reads per sample
p0 <- microBut
print_ps(p0)
## 01] ntaxa = 92
## 02] nsamples = 56
## 03] nsamplesvariables = 4
## 04] nranks = 6
## 05] Min. number of reads = 93
## 06] Max. number of reads = 553
## 07] Total number of reads = 16479
## 08] Average number of reads = 294.27
## 09] Median number of reads = 296
## 10] Sparsity = 0.699340062111801
## 11] Number of singletons = 29
## 12] % of taxa that are singletons
## (i.e. exactly one read detected across all samples) = 31.5217391304348
kable(head(tax_table(p0)))
| Kingdom | Phylum | Class | Order | Family | Genus | |
|---|---|---|---|---|---|---|
| Otu120 | Bacteria | Firmicutes | Bacilli | Lactobacillales | Enterococcaceae | Enterococcus |
| Otu159 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | Lachnoclostridium |
| Otu437 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured |
| Otu061 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured |
| Otu360 | Bacteria | Firmicutes | Clostridia | Oscillospirales | Oscillospiraceae | uncultured |
| Otu078 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured |
# reduce size for example
ps0 <- core(p0, detection = 10, prevalence = 20 / 100)
# Add a prefix to taxa labels
ps0.f2 <- format_to_besthit(ps0, prefix = "MyBug-")
kable(head(tax_table(ps0.f2))[3:6])
| Domain | Phylum | Class | Order | Family | Genus | best_hit | |
|---|---|---|---|---|---|---|---|
| MyBug-Otu012:uncultured | Bacteria | Firmicutes | Clostridia | Oscillospirales | Oscillospiraceae | uncultured | MyBug-Otu012:uncultured |
| MyBug-Otu041:Oscillibacter | Bacteria | Firmicutes | Clostridia | Oscillospirales | Oscillospiraceae | Oscillibacter | MyBug-Otu041:Oscillibacter |
| MyBug-Otu026:Lachnoclostridium | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | Lachnoclostridium | MyBug-Otu026:Lachnoclostridium |
| MyBug-Otu050:uncultured | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured | MyBug-Otu050:uncultured |
p <- plot_read_distribution(ps0.f2, groups = "condition",
plot.type = "density")
print(p + theme_biome_utils())
ps0 <- core(p0, detection = 10, prevalence = 20 / 100)
pseq_df <- phy_to_ldf(ps0, transform.counts = NULL)
## An additonal column Sam_rep with sample names is created for reference purpose
kable(head(pseq_df))
| OTUID | Kingdom | Phylum | Class | Order | Family | Genus | Sam_rep | Abundance | group | site | mouse | condition |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Otu029 | Bacteria | Firmicutes | Clostridia | Clostridiales | Clostridiaceae | Clostridium | ce4406 | 0 | ce4406 | cecum | 4406 | 10%Fat |
| Otu052 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured | ce4406 | 13 | ce4406 | cecum | 4406 | 10%Fat |
| Otu012 | Bacteria | Firmicutes | Clostridia | Oscillospirales | Oscillospiraceae | uncultured | ce4406 | 36 | ce4406 | cecum | 4406 | 10%Fat |
| Otu041 | Bacteria | Firmicutes | Clostridia | Oscillospirales | Oscillospiraceae | Oscillibacter | ce4406 | 1 | ce4406 | cecum | 4406 | 10%Fat |
| Otu026 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | Lachnoclostridium | ce4406 | 6 | ce4406 | cecum | 4406 | 10%Fat |
| Otu050 | Bacteria | Firmicutes | Clostridia | Lachnospirales | Lachnospiraceae | uncultured | ce4406 | 0 | ce4406 | cecum | 4406 | 10%Fat |
# Plot Density of phyla
pseq <- microBut
# check 10%Fat
control_ps <- subset_samples(pseq, condition=="10%Fat")
p_hc <- taxa_distribution(control_ps) +
theme_biome_utils() +
labs(title = "10%Fat")
# check Oleate
oleate_ps <- subset_samples(pseq, condition=="Oleate")
p_oleate <- taxa_distribution(oleate_ps) +
theme_biome_utils() +
labs(title = "Oleate")
# check 60%Fat
HFD_ps <- subset_samples(pseq, condition=="60%Fat")
p_hfd <- taxa_distribution(HFD_ps) +
theme_biome_utils() +
labs(title = "60%Fat")
# harnessing the power of patchwork
p_hc / p_oleate / p_hfd + plot_layout(guides = "collect") +
plot_annotation(tag_levels = "A")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 27 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 27 rows containing non-finite values (stat_density).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 31 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 1 row(s) containing missing values (geom_path).
# Session Info
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 ggpubr_0.4.0
## [3] RColorBrewer_1.1-2 viridis_0.5.1
## [5] viridisLite_0.3.0 microbiomeutilities_1.00.15
## [7] dplyr_1.0.4 microbiome_1.12.0
## [9] VennDiagram_1.6.20 futile.logger_1.4.3
## [11] microbial_0.0.17 DESeq2_1.30.1
## [13] SummarizedExperiment_1.20.0 Biobase_2.50.0
## [15] MatrixGenerics_1.2.1 matrixStats_0.58.0
## [17] GenomicRanges_1.42.0 GenomeInfoDb_1.26.2
## [19] IRanges_2.24.1 S4Vectors_0.28.1
## [21] BiocGenerics_0.36.0 ape_5.4-1
## [23] ellipse_0.4.2 clustsig_1.1
## [25] knitr_1.31 directlabels_2021.1.13
## [27] vegan_2.5-7 lattice_0.20-41
## [29] permute_0.9-5 plyr_1.8.6
## [31] ggplot2_3.3.3 phyloseq_1.34.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 fastmatch_1.1-0
## [4] igraph_1.2.6 splines_4.0.3 BiocParallel_1.24.1
## [7] digest_0.6.27 foreach_1.5.1 htmltools_0.5.1.1
## [10] fansi_0.4.2 magrittr_2.0.1 memoise_2.0.0
## [13] cluster_2.1.1 DECIPHER_2.18.1 openxlsx_4.2.3
## [16] limma_3.46.0 Biostrings_2.58.0 annotate_1.68.0
## [19] RcppParallel_5.0.3 prettyunits_1.1.1 jpeg_0.1-8.1
## [22] colorspace_2.0-0 ggrepel_0.9.1 blob_1.2.1
## [25] haven_2.3.1 xfun_0.21 crayon_1.4.1
## [28] RCurl_1.98-1.2 jsonlite_1.7.2 genefilter_1.72.1
## [31] dada2_1.18.0 phangorn_2.5.5 survival_3.2-7
## [34] iterators_1.0.13 glue_1.4.2 gtable_0.3.0
## [37] zlibbioc_1.36.0 XVector_0.30.0 DelayedArray_0.16.1
## [40] car_3.0-10 Rhdf5lib_1.12.1 abind_1.4-5
## [43] scales_1.1.1 pheatmap_1.0.12 futile.options_1.0.1
## [46] DBI_1.1.1 edgeR_3.32.1 rstatix_0.7.0
## [49] Rcpp_1.0.6 xtable_1.8-4 progress_1.2.2
## [52] foreign_0.8-81 bit_4.0.4 httr_1.4.2
## [55] ellipsis_0.3.1 farver_2.0.3 pkgconfig_2.0.3
## [58] XML_3.99-0.5 sass_0.3.1 locfit_1.5-9.4
## [61] utf8_1.1.4 labeling_0.4.2 tidyselect_1.1.0
## [64] rlang_0.4.10 reshape2_1.4.4 AnnotationDbi_1.52.0
## [67] munsell_0.5.0 cellranger_1.1.0 tools_4.0.3
## [70] cachem_1.0.4 cli_2.3.1 generics_0.1.0
## [73] RSQLite_2.2.3 ade4_1.7-16 broom_0.7.5
## [76] evaluate_0.14 biomformat_1.18.0 stringr_1.4.0
## [79] fastmap_1.1.0 yaml_2.2.1 bit64_4.0.5
## [82] zip_2.1.1 randomForest_4.6-14 purrr_0.3.4
## [85] nlme_3.1-152 formatR_1.7 rstudioapi_0.13
## [88] compiler_4.0.3 curl_4.3 png_0.1-7
## [91] ggsignif_0.6.1 tibble_3.0.6 geneplotter_1.68.0
## [94] bslib_0.2.4 stringi_1.5.3 highr_0.8
## [97] forcats_0.5.1 Matrix_1.3-2 multtest_2.46.0
## [100] vctrs_0.3.6 pillar_1.5.0 lifecycle_1.0.0
## [103] rhdf5filters_1.2.0 jquerylib_0.1.3 data.table_1.14.0
## [106] bitops_1.0-6 R6_2.5.0 latticeExtra_0.6-29
## [109] hwriter_1.3.2 ShortRead_1.48.0 gridExtra_2.3
## [112] rio_0.5.16 codetools_0.2-18 lambda.r_1.2.4
## [115] MASS_7.3-53.1 assertthat_0.2.1 rhdf5_2.34.0
## [118] withr_2.4.1 GenomicAlignments_1.26.0 Rsamtools_2.6.0
## [121] GenomeInfoDbData_1.2.4 mgcv_1.8-34 hms_1.0.0
## [124] gghalves_0.1.1 quadprog_1.5-8 tidyr_1.1.2
## [127] rmarkdown_2.7 carData_3.0-4 Rtsne_0.15